#!/bin/sh
# Copyright (C) 2023 Checkmk GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.

SHEBANG_PATTERN='^\#!/.*sh'

usage() {
    cat >&2 <<-HERE
usage: $(basename "${0}") [--filter]

   --filter: read file candidates from lines of stdin (one file per line)
HERE
    exit 1
}

get_candidates_from_stdin() {
    while read -r line; do
        # gracefully ignore non-existing files
        [ -e "${line}" ] && printf "%s\000" "${line}"
    done
}

_grep_with_excludes() {
    # shellcheck disable=SC2046  # we want word splitting in the comment lines below
    grep \
        --recursive \
        --files-with-matches \
        --binary-files=without-match \
        $(: file types which definitely contain no shell code ---------------) \
        --exclude="*.Po" \
        --exclude="*.a" \
        --exclude="*.bat" \
        --exclude="*.bazel" \
        --exclude="*.c" \
        --exclude="*.cc" \
        --exclude="*.cmake" \
        --exclude="*.cmd" \
        --exclude="*.cpp" \
        --exclude="*.cs" \
        --exclude="*.csproj" \
        --exclude="*.css" \
        --exclude="*.diff" \
        --exclude="*.dll" \
        --exclude="*.exe" \
        --exclude="*.gif" \
        --exclude="*.gpg" \
        --exclude="*.groovy" \
        --exclude="*.h" \
        --exclude="*.hpp" \
        --exclude="*.html" \
        --exclude="*.ico" \
        --exclude="*.idx" \
        --exclude="*.imp" \
        --exclude="*.info" \
        --exclude="*.ipp" \
        --exclude="*.jar" \
        --exclude="*.java" \
        --exclude="*.js" \
        --exclude="*.json" \
        --exclude="*.log" \
        --exclude="*.lql" \
        --exclude="*.m4" \
        --exclude="*.map" \
        --exclude="*.md" \
        --exclude="*.msi" \
        --exclude="*.o" \
        --exclude="*.odt" \
        --exclude="*.pc" \
        --exclude="*.pdf" \
        --exclude="*.pem" \
        --exclude="*.php" \
        --exclude="*.pl" \
        --exclude="*.pm" \
        --exclude="*.png" \
        --exclude="*.po" \
        --exclude="*.proto" \
        --exclude="*.ps1" \
        --exclude="*.puml" \
        --exclude="*.py" \
        --exclude="*.pyc" \
        --exclude="*.pyi" \
        --exclude="*.rs" \
        --exclude="*.rst" \
        --exclude="*.scss" \
        --exclude="*.sln" \
        --exclude="*.so" \
        --exclude="*.svg" \
        --exclude="*.t" \
        --exclude="*.tar.bz2" \
        --exclude="*.tar.gz" \
        --exclude="*.tar.xz" \
        --exclude="*.toml" \
        --exclude="*.ts" \
        --exclude="*.txt" \
        --exclude="*.vbs" \
        --exclude="*.vcxproj" \
        --exclude="*.wav" \
        --exclude="*.whl" \
        --exclude="*.xml" \
        --exclude="*.yaml" \
        --exclude="*.yml" \
        --exclude="*.zip" \
        --exclude="Dockerfile" \
        --exclude="README*" \
        "$@"
}

get_files_from_filesystem() {
    cd "$(dirname "$(dirname "$(realpath "$0")")")" || exit $?
    # shellcheck disable=SC2046  # we want word splitting in the comment lines below
    _grep_with_excludes \
        $(: autoconf stuff --------------------------------------------------) \
        --exclude-dir="autom4te.cache" \
        --exclude="ar-lib" \
        --exclude="compile" \
        --exclude="config.guess" \
        --exclude="config.status" \
        --exclude="config.sub" \
        --exclude="configure" \
        --exclude="depcomp" \
        --exclude="install-sh" \
        --exclude="missing" \
        $(: git stuff -------------------------------------------------------) \
        --exclude-dir=".git" \
        --exclude=".git*" \
        $(: 3rd party C++ libraries -----------------------------------------) \
        --exclude-dir="asio-*-patched" \
        --exclude-dir="cpp-httplib-0.13.3" \
        --exclude-dir="googletest-*-patched" \
        $(: various artifact directories ------------------------------------) \
        --exclude-dir="bazel-*" \
        --exclude-dir="build" \
        --exclude-dir="external" \
        --exclude-dir="node_modules" \
        --exclude-dir=".venv" \
        --exclude-dir="shared_cargo_folder" \
        $(: docker directories ----------------------------------------------) \
        --exclude-dir="build_user_home" \
        $(: search in omd separately, for more fine tuning ------------------) \
        --exclude-dir="omd" \
        "${SHEBANG_PATTERN}" ./* &&
        _grep_with_excludes \
            $(: just the status quo, feel free to fix stuff -----------------) \
            --exclude="distro" \
            --exclude="configure" \
            --exclude="MKEVENTD*" \
            --exclude=".f12" \
            --exclude="AGENT_RECEIVER*" \
            --exclude="MULTISITE_*" \
            --exclude-dir="debian" \
            --exclude-dir="apache-omd" \
            --exclude-dir="appliance" \
            --exclude-dir="build" \
            --exclude-dir="maintenance" \
            --exclude-dir="nagios" \
            --exclude-dir="nagvis" \
            --exclude-dir="navicli" \
            --exclude-dir="perl-modules" \
            --exclude-dir="enterprise" \
            --exclude-dir="omd" \
            --exclude-dir="pnp4nagios" \
            --exclude-dir="mk-livestatus" \
            "${SHEBANG_PATTERN}" ./omd/*
}

filter_for_shell_files() {
    # use "-r" over "--no-run-if-empty" and "-0" over "--null" to work on alpine
    sort --zero-terminated |
        xargs -r -0 \
            grep --binary-files=without-match --files-with-matches "${SHEBANG_PATTERN}"
}

main() {
    if [ $# -eq 0 ]; then
        get_files_from_filesystem
    elif [ $# -eq 1 ]; then
        case $1 in
            --filter)
                # in the filtering case, no match by grep is OK!
                get_candidates_from_stdin | filter_for_shell_files || true
                ;;
            *)
                usage
                ;;
        esac
    else
        usage
    fi
}

[ -z "${MK_SOURCE_ONLY}" ] && main "$@"
